Merging CQC and financial data
# import location level full data
rating<- read_csv(here("cleaned_data","cic_all_ratings_2019.csv"))
finance <- read_csv(here("cleaned_data","cls_finance_2025.csv"))
finance1 <- finance %>%
mutate(id_digit = as.numeric(str_extract(project_id, "\\d+"))) %>%
arrange(id_digit) %>%
mutate(
current_ratio = ifelse(currentliabilities == 0, NA, currentasset / currentliabilities),
dissolved = factor(dissolved))
# checking the largest number for the project_id in the finance data set
summary(finance1$id_digit)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 228.0 460.0 610.1 913.2 5181.0
#merging the data
cic2019 <- rating %>%
left_join(finance1, by = "project_id")
data cleanning
#select relevant columns, rename and relabel
cic_cleaned <- cic2019 %>%
# recode legal form types to be more readable / easier to present
mutate(# inherited = ifelse(inherited == "Y", TRUE, FALSE),
rating = recode(rating,
"Insufficient evidence to rate" = "NA",
"Requires improvement" = "Req improv"),
date = ymd(publication_date)) %>%
# assign order in the rating levels
mutate(rating = ordered(rating, levels = c("Inadequate","Req improv", "Good", "Outstanding")),
social_care = ifelse(type == "Social Care Org", "social care", "healthcare")) %>%
# creating a new dummy variable for facility category
mutate(founded = as.numeric(founded),
year = year(date),
age = year - founded,
Year = factor(year)) %>%
mutate(cls = ifelse(CLS == 1, "CLS", "CLG"),
totalequity = as.numeric(totalequity),
totalequity_std = scale(totalequity, center = TRUE, scale = TRUE)) %>%
mutate(share_size = case_when(
totalissueshares <= 20 ~ "small",
totalissueshares >= 100000 ~ "large",
TRUE ~ NA_character_
))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `founded = as.numeric(founded)`.
## Caused by warning:
## ! NAs introduced by coercion
Check the distribution of the data for share_size
cic_cleaned %>%
count(share_size)
## # A tibble: 3 × 2
## share_size n
## <chr> <int>
## 1 large 23
## 2 small 464
## 3 <NA> 1409
Total counts in full data set
count_full <- cic_cleaned %>%
mutate(overall = ifelse(domain == "Overall", 1, 0)) %>%
summarize(count_provider = n_distinct(provider_name),
count_location = n_distinct(location_name)-1,
count_overall_rating = sum(overall),
count_rating = n())
count_full
## # A tibble: 1 × 4
## count_provider count_location count_overall_rating count_rating
## <int> <dbl> <dbl> <int>
## 1 103 170 551 1896
count_by_form <- cic_cleaned %>%
mutate(overall = ifelse(domain == "Overall", 1, 0)) %>%
group_by(cls) %>%
summarize(count_provider = n_distinct(provider_name),
count_location = n_distinct(location_name)-1,
count_overall_rating = sum(overall),
count_rating = n())
count_by_form
## # A tibble: 3 × 5
## cls count_provider count_location count_overall_rating count_rating
## <chr> <int> <dbl> <dbl> <int>
## 1 CLG 47 56 104 527
## 2 CLS 56 114 447 1354
## 3 <NA> 3 2 0 15
count_by_level_form <- cic_cleaned %>%
mutate(overall = ifelse(domain == "Overall", 1, 0)) %>%
group_by(cls, level) %>%
summarize(count_provider = n_distinct(provider_name),
count_location = n_distinct(location_name),
# the count_location for provider should be manually adjusted to 0
count_overall_rating = sum(overall),
count_rating = n())
## `summarise()` has grouped output by 'cls'. You can override using the `.groups`
## argument.
count_by_level_form
## # A tibble: 5 × 6
## # Groups: cls [3]
## cls level count_provider count_location count_overall_rating count_rating
## <chr> <chr> <int> <int> <dbl> <int>
## 1 CLG location 46 56 75 353
## 2 CLG provider 3 1 29 174
## 3 CLS location 51 114 395 1042
## 4 CLS provider 10 1 52 312
## 5 <NA> location 3 3 0 15
count_by_level <- cic_cleaned %>%
mutate(overall = ifelse(domain == "Overall", 1, 0)) %>%
group_by(level) %>%
summarize(count_provider = n_distinct(provider_name),
count_location = n_distinct(location_name),
# the count_location for provider should be manually adjusted to 0
count_overall_rating = sum(overall),
count_rating = n())
count_by_level
## # A tibble: 2 × 5
## level count_provider count_location count_overall_rating count_rating
## <chr> <int> <int> <dbl> <int>
## 1 location 97 170 470 1410
## 2 provider 13 1 81 486
count_by_level_type <- cic_cleaned %>%
mutate(overall = ifelse(domain == "Overall", 1, 0)) %>%
group_by(social_care, level) %>%
summarize(count_provider = n_distinct(provider_name),
count_location = n_distinct(location_name),
# the count_location for provider should be manually adjusted to 0
count_overall_rating = sum(overall),
count_rating = n())
## `summarise()` has grouped output by 'social_care'. You can override using the
## `.groups` argument.
count_by_level_type
## # A tibble: 3 × 6
## # Groups: social_care [2]
## social_care level count_provider count_location count_overall_rating
## <chr> <chr> <int> <int> <dbl>
## 1 healthcare location 38 82 374
## 2 healthcare provider 13 1 81
## 3 social care location 61 88 96
## # ℹ 1 more variable: count_rating <int>
datasummary_crosstab(
formula = cls ~ rating,
data = cic_cleaned
)
tinytable_teri27jmghitw0ljwdgq
| cls |
|
Inadequate |
Req improv |
Good |
Outstanding |
All |
| CLG |
N |
13 |
50 |
412 |
52 |
527 |
| |
% row |
2.5 |
9.5 |
78.2 |
9.9 |
100.0 |
| CLS |
N |
44 |
98 |
1078 |
134 |
1354 |
| |
% row |
3.2 |
7.2 |
79.6 |
9.9 |
100.0 |
| All |
N |
57 |
151 |
1502 |
186 |
1896 |
| |
% row |
3.0 |
8.0 |
79.2 |
9.8 |
100.0 |
datasummary_crosstab(
formula = cls * spinout ~ rating,
data = cic_cleaned
)
tinytable_d30yies06ylidy0es72y
| cls |
spinout |
|
Inadequate |
Req improv |
Good |
Outstanding |
All |
| CLG |
0 |
N |
10 |
27 |
254 |
32 |
323 |
| |
|
% row |
3.1 |
8.4 |
78.6 |
9.9 |
100.0 |
| |
1 |
N |
3 |
23 |
158 |
20 |
204 |
| |
|
% row |
1.5 |
11.3 |
77.5 |
9.8 |
100.0 |
| CLS |
0 |
N |
25 |
46 |
643 |
92 |
806 |
| |
|
% row |
3.1 |
5.7 |
79.8 |
11.4 |
100.0 |
| |
1 |
N |
19 |
52 |
435 |
42 |
548 |
| |
|
% row |
3.5 |
9.5 |
79.4 |
7.7 |
100.0 |
| |
All |
N |
57 |
151 |
1502 |
186 |
1896 |
| |
|
% row |
3.0 |
8.0 |
79.2 |
9.8 |
100.0 |
regression analysis
models without finance variable
model_order_overall <- clm(rating ~ cls + spinout + social_care + age + dissolved,
data = filter(cic_cleaned, domain == "Overall"),
link = "logit")
model_order_safe <- clm(rating ~ cls + spinout + social_care + age + dissolved,
data = filter(cic_cleaned, domain == "Safe"),
link = "logit")
model_order_effective <- clm(rating ~ cls + spinout + social_care + age + dissolved,
data = filter(cic_cleaned, domain == "Effective"),
link = "logit")
model_order_caring <- clm(rating ~ cls + spinout + social_care + age + dissolved,
data = filter(cic_cleaned, domain == "Caring"),
link = "logit")
model_order_well_led <- clm(rating ~ cls + spinout + social_care + age + dissolved,
data = filter(cic_cleaned, domain == "Well-led"),
link = "logit")
model_order_responsive <- clm(rating ~ cls + spinout + social_care + age + dissolved,
data = filter(cic_cleaned, domain == "Responsive"),
link = "logit")
ordinal_models <-
modelsummary(
list(
"overall" = model_order_overall,
"safe" = model_order_safe,
"effective" = model_order_effective,
"caring" = model_order_caring,
"well-led" = model_order_well_led,
"responsive" = model_order_responsive
),
coef_omit = "region",
exponentiate = F,
statistic = "({p.value}) {stars}")
ordinal_models
tinytable_k502zsxz8pkka343ympg
| |
overall |
safe |
effective |
caring |
well-led |
responsive |
| Inadequate|Req improv |
-3.495 |
-3.316 |
-3.785 |
|
-3.517 |
|
| |
(<0.001) *** |
(<0.001) *** |
(<0.001) *** |
|
(<0.001) *** |
|
| Req improv|Good |
-2.506 |
-1.498 |
-2.326 |
-5.341 |
-2.031 |
-3.916 |
| |
(<0.001) *** |
(0.003) ** |
(<0.001) *** |
(<0.001) *** |
(<0.001) *** |
(<0.001) *** |
| Good|Outstanding |
1.424 |
4.126 |
2.788 |
0.669 |
1.843 |
1.466 |
| |
(<0.001) *** |
(<0.001) *** |
(<0.001) *** |
(0.206) |
(<0.001) *** |
(0.008) ** |
| clsCLS |
0.478 |
0.588 |
0.266 |
-0.330 |
-0.041 |
-0.118 |
| |
(0.076) + |
(0.092) + |
(0.466) |
(0.368) |
(0.895) |
(0.764) |
| spinout |
-0.378 |
-0.323 |
-0.433 |
-0.184 |
-0.264 |
-0.626 |
| |
(0.068) + |
(0.344) |
(0.226) |
(0.600) |
(0.377) |
(0.109) |
| social_caresocial care |
0.090 |
0.972 |
0.145 |
-0.820 |
0.026 |
-0.098 |
| |
(0.734) |
(0.013) * |
(0.694) |
(0.040) * |
(0.933) |
(0.801) |
| age |
-0.120 |
-0.042 |
-0.051 |
-0.071 |
-0.025 |
-0.064 |
| |
(<0.001) *** |
(0.386) |
(0.266) |
(0.204) |
(0.563) |
(0.246) |
| dissolved1 |
0.082 |
-0.724 |
0.222 |
0.065 |
0.188 |
-0.011 |
| |
(0.803) |
(0.085) + |
(0.657) |
(0.896) |
(0.652) |
(0.983) |
| Num.Obs. |
540 |
261 |
261 |
261 |
261 |
261 |
| AIC |
906.8 |
341.4 |
332.7 |
274.7 |
447.6 |
269.9 |
| BIC |
941.1 |
369.9 |
361.2 |
299.7 |
476.1 |
294.8 |
| RMSE |
2.45 |
2.17 |
2.23 |
1.55 |
2.42 |
1.40 |
models with equity variables
Due to the large range/dispersion of the fiancial data. I standardize
the totalequity variable to enable the models to run.
summary(cic_cleaned$totalequity)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -6481922 36481 726352 2042766 3827016 9661966 45
models with total equity
eq_order_overall <- clm(rating ~ cls + spinout + social_care + age + dissolved + totalequity_std,
data = filter(cic_cleaned, domain == "Overall"),
link = "logit")
eq_order_safe <- clm(rating ~ cls + spinout + social_care + age + dissolved + totalequity_std,
data = filter(cic_cleaned, domain == "Safe"),
link = "logit")
eq_order_effective <- clm(rating ~ cls + spinout + social_care + age + dissolved + totalequity_std,
data = filter(cic_cleaned, domain == "Effective"),
link = "logit")
eq_order_caring <- clm(rating ~ cls + spinout + social_care + age + dissolved + totalequity_std,
data = filter(cic_cleaned, domain == "Caring"),
link = "logit")
eq_order_well_led <- clm(rating ~ cls + spinout + social_care + age + dissolved + totalequity_std,
data = filter(cic_cleaned, domain == "Well-led"),
link = "logit")
eq_order_responsive <- clm(rating ~ cls + spinout + social_care + age + dissolved + totalequity_std,
data = filter(cic_cleaned, domain == "Responsive"),
link = "logit")
eq_models <-
modelsummary(
list(
"overall" = eq_order_overall,
"safe" = eq_order_safe,
"effective" = eq_order_effective,
"caring" = eq_order_caring,
"well-led" = eq_order_well_led,
"responsive" = eq_order_responsive
),
coef_omit = "region",
exponentiate = F,
statistic = "({p.value}) {stars}")
eq_models
tinytable_n48qs45djadyft9jwtr4
| |
overall |
safe |
effective |
caring |
well-led |
responsive |
| Inadequate|Req improv |
-3.440 |
-3.426 |
-3.834 |
|
-3.579 |
|
| |
(<0.001) *** |
(<0.001) *** |
(<0.001) *** |
|
(<0.001) *** |
|
| Req improv|Good |
-2.452 |
-1.587 |
-2.369 |
-5.388 |
-2.090 |
-3.960 |
| |
(<0.001) *** |
(0.002) ** |
(<0.001) *** |
(<0.001) *** |
(<0.001) *** |
(<0.001) *** |
| Good|Outstanding |
1.483 |
4.083 |
2.764 |
0.633 |
1.801 |
1.433 |
| |
(<0.001) *** |
(<0.001) *** |
(<0.001) *** |
(0.230) |
(<0.001) *** |
(0.009) ** |
| clsCLS |
0.450 |
0.626 |
0.297 |
-0.293 |
-0.007 |
-0.083 |
| |
(0.099) + |
(0.074) + |
(0.417) |
(0.430) |
(0.982) |
(0.834) |
| spinout |
-0.276 |
-0.574 |
-0.584 |
-0.292 |
-0.402 |
-0.735 |
| |
(0.267) |
(0.122) |
(0.132) |
(0.447) |
(0.213) |
(0.085) + |
| social_caresocial care |
0.035 |
1.253 |
0.325 |
-0.698 |
0.191 |
0.022 |
| |
(0.901) |
(0.003) ** |
(0.425) |
(0.109) |
(0.575) |
(0.959) |
| age |
-0.114 |
-0.057 |
-0.060 |
-0.081 |
-0.037 |
-0.073 |
| |
(<0.001) *** |
(0.234) |
(0.189) |
(0.161) |
(0.401) |
(0.197) |
| dissolved1 |
0.070 |
-0.665 |
0.263 |
0.095 |
0.232 |
0.017 |
| |
(0.832) |
(0.115) |
(0.599) |
(0.848) |
(0.579) |
(0.975) |
| totalequity_std |
-0.097 |
0.332 |
0.203 |
0.142 |
0.188 |
0.140 |
| |
(0.455) |
(0.080) + |
(0.309) |
(0.480) |
(0.263) |
(0.524) |
| Num.Obs. |
540 |
261 |
261 |
261 |
261 |
261 |
| AIC |
908.3 |
340.3 |
333.6 |
276.2 |
448.4 |
271.5 |
| BIC |
946.9 |
372.4 |
365.7 |
304.7 |
480.4 |
300.0 |
| RMSE |
2.45 |
2.17 |
2.23 |
1.55 |
2.42 |
1.40 |
models with current ratio
current_order_overall <- clm(rating ~ cls + spinout + social_care + age + dissolved + current_ratio,
data = filter(cic_cleaned, domain == "Overall"),
link = "logit")
current_order_safe <- clm(rating ~ cls + spinout + social_care + age + dissolved + current_ratio,
data = filter(cic_cleaned, domain == "Safe"),
link = "logit")
current_order_effective <- clm(rating ~ cls + spinout + social_care + age + dissolved + current_ratio,
data = filter(cic_cleaned, domain == "Effective"),
link = "logit")
current_order_caring <- clm(rating ~ cls + spinout + social_care + age + dissolved + current_ratio,
data = filter(cic_cleaned, domain == "Caring"),
link = "logit")
current_order_well_led <- clm(rating ~ cls + spinout + social_care + age + dissolved + current_ratio,
data = filter(cic_cleaned, domain == "Well-led"),
link = "logit")
current_order_responsive <- clm(rating ~ cls + spinout + social_care + age + dissolved + current_ratio,
data = filter(cic_cleaned, domain == "Responsive"),
link = "logit")
current_models <-
modelsummary(
list(
"overall" = current_order_overall,
"safe" = current_order_safe,
"effective" = current_order_effective,
"caring" = current_order_caring,
"well-led" = current_order_well_led,
"responsive" = current_order_responsive
),
coef_omit = "region",
exponentiate = F,
statistic = "({p.value}) {stars}")
current_models
tinytable_of4w3xa9up910qrxohpl
| |
overall |
safe |
effective |
caring |
well-led |
responsive |
| Inadequate|Req improv |
-3.373 |
-3.234 |
-3.624 |
|
-3.400 |
|
| |
(<0.001) *** |
(<0.001) *** |
(<0.001) *** |
|
(<0.001) *** |
|
| Req improv|Good |
-2.381 |
-1.416 |
-2.162 |
-5.284 |
-1.912 |
-3.826 |
| |
(<0.001) *** |
(0.007) ** |
(<0.001) *** |
(<0.001) *** |
(<0.001) *** |
(<0.001) *** |
| Good|Outstanding |
1.562 |
4.220 |
2.986 |
0.733 |
1.977 |
1.563 |
| |
(<0.001) *** |
(<0.001) *** |
(<0.001) *** |
(0.180) |
(<0.001) *** |
(0.006) ** |
| clsCLS |
0.503 |
0.610 |
0.324 |
-0.311 |
-0.001 |
-0.082 |
| |
(0.062) + |
(0.082) + |
(0.379) |
(0.400) |
(0.998) |
(0.837) |
| spinout |
-0.344 |
-0.306 |
-0.398 |
-0.169 |
-0.236 |
-0.606 |
| |
(0.099) + |
(0.371) |
(0.270) |
(0.631) |
(0.431) |
(0.123) |
| social_caresocial care |
0.080 |
0.953 |
0.108 |
-0.841 |
0.002 |
-0.120 |
| |
(0.763) |
(0.015) * |
(0.771) |
(0.037) * |
(0.994) |
(0.758) |
| age |
-0.120 |
-0.042 |
-0.049 |
-0.070 |
-0.024 |
-0.063 |
| |
(<0.001) *** |
(0.386) |
(0.280) |
(0.207) |
(0.575) |
(0.250) |
| dissolved1 |
0.114 |
-0.721 |
0.237 |
0.071 |
0.198 |
-0.002 |
| |
(0.731) |
(0.086) + |
(0.637) |
(0.886) |
(0.634) |
(0.997) |
| current_ratio |
0.048 |
0.037 |
0.063 |
0.025 |
0.045 |
0.033 |
| |
(0.134) |
(0.538) |
(0.186) |
(0.611) |
(0.296) |
(0.483) |
| Num.Obs. |
540 |
261 |
261 |
261 |
261 |
261 |
| AIC |
906.6 |
343.0 |
333.2 |
276.5 |
448.6 |
271.4 |
| BIC |
945.2 |
375.1 |
365.3 |
305.0 |
480.6 |
299.9 |
| RMSE |
2.45 |
2.17 |
2.23 |
1.55 |
2.42 |
1.40 |